4. Pandas - plotting



In [2]:

    
%pylab inline
from pandas import Series, DataFrame
import pandas as pd









    



Populating the interactive namespace from numpy and matplotlib

pandas中的繪圖函數

線型圖



In [3]:

    
# Series, DataFrame 的 plot()方法 預設是繪製 線型圖
s = Series(np.random.randn(10).cumsum(), index = np.arange(0, 100, 10))
s.plot()









    Out[3]:





<matplotlib.axes._subplots.AxesSubplot at 0x60f05f8>



In [4]:

    
# Series的索引會被作為subplot的 X軸，可以使用參數 use_index = False 來禁用該功能
# X軸的刻度可以透過 xticks 和 xlim 選向來調整
# Y軸的刻度可以透過 yticks 和 ylim 選向來調整



In [5]:

    
# DataFrame的 plot()方法會在subplot中為每個 column繪製一條線，並自動創建legend
df = DataFrame(np.random.randn(10, 4).cumsum(0), 
               index = np.arange(0, 100, 10), 
               columns = ['A', 'B', 'C', 'D'])
df.tail(3)



In [6]:

    
df.plot()









    Out[6]:





<matplotlib.axes._subplots.AxesSubplot at 0x6116358>

柱狀圖



In [7]:

    
# 設定 kind = 'bar' 或 'barh' 即可繪製柱狀圖
# Series和 DataFrame的 索引會被當作subplot的 X軸(bar)或 Y軸(barh)

fig, axes = plt.subplots(2, 1)
data = Series(np.random.rand(16), index = list('abcedfghijklmnop'))
data.plot(ax = axes[0], kind = 'bar')
data.plot(ax = axes[1], kind = 'barh')









    Out[7]:





<matplotlib.axes._subplots.AxesSubplot at 0x9607630>



In [8]:

    
df = DataFrame(np.random.rand(6, 4), 
               index = ['one', 'two', 'three', 'four', 'five', 'six'], 
               columns = pd.Index(['A', 'B', 'C', 'D'], name = 'Genus'))
df  
# DataFrame的每一 row的值分為一組



In [9]:

    
# DataFrame的每一 row的值分為一組
# columns 索引的 name屬性 被用來做為 legend的標題
df.plot(kind = 'bar')









    Out[9]:





<matplotlib.axes._subplots.AxesSubplot at 0x96a42b0>



In [10]:

    
# 設定 stacked = True, 可繪製 堆積柱狀圖
df.plot(kind = 'barh', stacked = True)









    Out[10]:





<matplotlib.axes._subplots.AxesSubplot at 0x97a4ef0>



In [11]:

    
s = Series([2, 3, 5, 2, 5, 6, 7, 8, 9, 10, 13, 2, 3, 4, 7, 8, 9, 0, 0, 2, 2, 1])

# 用Series的 value_counts()直接繪製柱狀圖，表達每個數字出現的次數
vc = s.value_counts()
vc.plot(kind = 'bar')









    Out[11]:





<matplotlib.axes._subplots.AxesSubplot at 0x98a3320>



In [12]:

    
tips = pd.read_csv('../data/tips.csv')
tips[:5]









    Out[12]:






  
    
      
      total_bill
      tip
      sex
      smoker
      day
      time
      size
    
  
  
    
      0
      16.99
      1.01
      Female
      No
      Sun
      Dinner
      2
    
    
      1
      10.34
      1.66
      Male
      No
      Sun
      Dinner
      3
    
    
      2
      21.01
      3.50
      Male
      No
      Sun
      Dinner
      3
    
    
      3
      23.68
      3.31
      Male
      No
      Sun
      Dinner
      2
    
    
      4
      24.59
      3.61
      Female
      No
      Sun
      Dinner
      4



In [13]:

    
# 用 crosstab()方法創建一個 交叉表，預設統計 發生的次數(計數)
party_counts = pd.crosstab(tips.day , tips['size'])
party_counts



In [14]:

    
party_counts.plot(kind = 'bar')









    Out[14]:





<matplotlib.axes._subplots.AxesSubplot at 0xa880a90>



In [15]:

    
party_counts = party_counts.ix[:, 2:5]
party_counts.plot(kind = 'bar', stacked = True)









    Out[15]:





<matplotlib.axes._subplots.AxesSubplot at 0xa97feb8>



In [16]:

    
party_counts



In [17]:

    
party_counts = party_counts.div(party_counts.sum(1), axis = 0)
party_counts



In [18]:

    
party_counts.sum(1)









    Out[18]:





day
Fri     1.0
Sat     1.0
Sun     1.0
Thur    1.0
dtype: float64



In [19]:

    
party_counts.plot(kind = 'bar', stacked = True)









    Out[19]:





<matplotlib.axes._subplots.AxesSubplot at 0xaa72080>

直方圖(histogram)和密度圖



In [20]:

    
tips = pd.read_csv('../data/tips.csv')
tips[:5]









    Out[20]:






  
    
      
      total_bill
      tip
      sex
      smoker
      day
      time
      size
    
  
  
    
      0
      16.99
      1.01
      Female
      No
      Sun
      Dinner
      2
    
    
      1
      10.34
      1.66
      Male
      No
      Sun
      Dinner
      3
    
    
      2
      21.01
      3.50
      Male
      No
      Sun
      Dinner
      3
    
    
      3
      23.68
      3.31
      Male
      No
      Sun
      Dinner
      2
    
    
      4
      24.59
      3.61
      Female
      No
      Sun
      Dinner
      4



In [21]:

    
# 可以用 plot(kind = 'hist') 來繪製直方圖
tips.total_bill.plot(kind = 'hist', bins = 50)
plt.title('total_bill')









    Out[21]:





<matplotlib.text.Text at 0xab5d630>



In [22]:

    
# 也可以用 hist() 來繪製直方圖
tips.total_bill.hist(bins = 50)
plt.title('total_bill')









    Out[22]:





<matplotlib.text.Text at 0xac5cf98>



In [23]:

    
# tip比例 直方圖
tip_ratios = (tips.tip / tips.total_bill)
tip_ratios.hist(bins = 50)
plt.title('tip ratio')









    Out[23]:





<matplotlib.text.Text at 0xad86ef0>

散佈圖(scatter plot)



In [24]:

    
macro = pd.read_csv('../data/macrodata.csv')
macro[:5]



In [25]:

    
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
data[:5]



In [26]:

    
# diff(): 以上下元素的差異值填入
trans_data = np.log(data).diff().dropna()
trans_data[:5]



In [27]:

    
# plt.scatter()可以繪製散佈圖，標示每一個資料row的 兩個columns的數據分布
plt.scatter(trans_data.m1, trans_data.unemp)
plt.title('Changes in log({0}) vs. log({1})'.format('m1', 'unemp'))









    Out[27]:





<matplotlib.text.Text at 0xaeacda0>



In [28]:

    
trans_data.plot.scatter('m1', 'unemp')









    Out[28]:





<matplotlib.axes._subplots.AxesSubplot at 0x5952240>



In [29]:

    
# pandas 提供了 scatter_matrix()函數，方便由DataFrame繪製散佈圖
# 會自動的產生各個columns之間的 scatter diagram
pd.scatter_matrix(trans_data, color = 'k', alpha = 0.3)









    Out[29]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x0000000005977BE0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000005A0FAC8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000005A5F4E0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000005A9A198>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x0000000005AE42B0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000005C03860>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000005C4CF60>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000005C896A0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x0000000005CD88D0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000005D22710>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000005D67B38>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000AEE0D68>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000000000AF1CEF0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000AF6B9E8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000AFA71D0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000AFF4940>]], dtype=object)



In [ ]:

	A	B	C	D
70	0.185760	-0.418143	-1.567216	-3.965365
80	2.700475	-0.807974	-2.625121	-5.042702
90	3.168337	0.033208	-3.949831	-4.925446

Genus	A	B	C	D
one	0.220608	0.513349	0.946094	0.200548
two	0.846614	0.781777	0.814468	0.534386
three	0.375286	0.143810	0.365030	0.401326
four	0.554727	0.275458	0.071217	0.576424
five	0.048619	0.932931	0.496425	0.361190
six	0.758107	0.034670	0.466343	0.932895

size	2	3	4	5
day
Fri	0.888889	0.055556	0.055556	0.000000
Sat	0.623529	0.211765	0.152941	0.011765
Sun	0.520000	0.200000	0.240000	0.040000
Thur	0.827586	0.068966	0.086207	0.017241

	year	quarter	realgdp	realcons	realinv	realgovt	realdpi	cpi	m1	tbilrate	unemp	pop	infl	realint
0	1959.0	1.0	2710.349	1707.4	286.898	470.045	1886.9	28.98	139.7	2.82	5.8	177.146	0.00	0.00
1	1959.0	2.0	2778.801	1733.7	310.859	481.301	1919.7	29.15	141.7	3.08	5.1	177.830	2.34	0.74
2	1959.0	3.0	2775.488	1751.8	289.226	491.260	1916.4	29.35	140.5	3.82	5.3	178.657	2.74	1.09
3	1959.0	4.0	2785.204	1753.7	299.356	484.052	1931.3	29.37	140.0	4.33	5.6	179.386	0.27	4.06
4	1960.0	1.0	2847.699	1770.5	331.722	462.199	1955.5	29.54	139.6	3.50	5.2	180.007	2.31	1.19

	cpi	m1	tbilrate	unemp
0	28.98	139.7	2.82	5.8
1	29.15	141.7	3.08	5.1
2	29.35	140.5	3.82	5.3
3	29.37	140.0	4.33	5.6
4	29.54	139.6	3.50	5.2

	total_bill	tip	sex	smoker	day	time	size
0	16.99	1.01	Female	No	Sun	Dinner	2
1	10.34	1.66	Male	No	Sun	Dinner	3
2	21.01	3.50	Male	No	Sun	Dinner	3
3	23.68	3.31	Male	No	Sun	Dinner	2
4	24.59	3.61	Female	No	Sun	Dinner	4

	cpi	m1	tbilrate	unemp
1	0.005849	0.014215	0.088193	-0.128617
2	0.006838	-0.008505	0.215321	0.038466
3	0.000681	-0.003565	0.125317	0.055060
4	0.005772	-0.002861	-0.212805	-0.074108
5	0.000338	0.004289	-0.266946	0.000000